home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Collection of Tools & Utilities
/
Collection of Tools and Utilities.iso
/
tex
/
gestalt.zip
/
SIMIL_C.ASM
< prev
next >
Wrap
Assembly Source File
|
1988-12-06
|
13KB
|
336 lines
;From the article "Pattern Matching - The Gestalt Approach"
;by John W. Ratcliff and David E. Metzener
;Dr. Dobbs Journal, July 1988
;Typed in 6 Dec 88, and very slightly tweaked (see the 'TH' comments).
; (I don't have a C compiler, so couldn't test this particular code.
; See other files for assembly language program and Turbo Pascal
; compatible procedures.)
;
; David Kirschbaum
; Toad Hall
; kirsch@braggvax.ARPA
TITLE SIMIL.ASM written by John W. Ratcliff and David E. Metzener
; November 10, 1987
; Uses the Ratcliff/Obershelp pattern recognition algorithm.
; This program provides a new function to C on an 8086 based machine.
; The function SIMIL returns a percentage value corresponding to how
; alike any two strings are. Be certain to upper case the two strings
; passed if you are not concerned about case sensitivity.
; NOTE: This routine is for SMALL model only. As an exercise for
; the student, feel free to convert it to LARGE.
_TEXT SEGMENT BYTE PUBLIC 'CODE'
_TEXT ENDS
CONST SEGMENT WORD PUBLIC 'CONST'
CONST ENDS
_BSS SEGMENT WORD PUBLIC 'BSS'
_BSS ENDS
_DATA SEGMENT WORD PUBLIC 'DATA'
_DATA ENDS
DGROUP GROUP CONST, _BSS, _DATA
ASSUME CS:_TEXT, DS:DGROUP, SS:DGROUP, ES:DGROUP
_DATA SEGMENT
ststr1L dw 25 dup(?) ;contains lefts for string 1
ststr1R dw 25 dup(?) ;contains rights for string 1
ststr2L dw 25 dup(?) ;contains lefts for string 2
ststr2R dw 25 dup(?) ;contains rights for string 2
stcknum dw ? ;number of elements on the stack
score dw ? ;the # of chars in common times 2
total dw ? ;total # of chars in string 1 and 2
cL1 dw ? ;left of string 1 found in common
cR1 dw ? ;right of string 1 found in common
cL2 dw ? ;left of string 2 found in common
cR2 dw ? ;right of string 2 found in common
s2ed dw ? ;the end of string 2 used in compare
_DATA ENDS
public _simil
_TEXT SEGMENT
_simil proc near
; This routine expects pointers passed in two character strings, null
; terminated, that you wish compared. It returns a percentage value
; from 0 to 100% corresponding to how alike the two strings are.
; +4 +6
; usage: simil(char *str1,char *str2)
; The similarity routine is composed of three major components
; pushst --- pushes a string section to be compared on the stack
; popst --- pops a string section to be examined off of the stack
; compare --- finds the largest group of characters in common between
; any two string sections
; The similarity routine begins by computing the total length of both
; strings passed and placing that value in TOTAL. It then takes
; the beginning and ending of both strings passed and pushes them on
; the stack. It then falls into the main line code.
; The original two strings are immediately popped off of the stack and
; are passed to the compare routine. The compare routine will find the
; largest group of characters in common between the two strings.
; The number of characters in common is multiplied times two and added
; to the total score. If there were no characters in common, then there
; is nothing to push onto the stack. If there are exactly one character
; to the left in both strings, then we needn't push it on the stack.
; (We already know they aren't equal from the previous call to compare.)
; Otherwise the characters to the left are pushed onto the stack. These
; same rules apply to characters to the right of the substring found in
; common. This process of pulling substrings off of the stack, comparing
; them, and pushing remaining sections on the stack is continued until
; the stack is empty. On return the total score is divided by the
; number of characters in both strings. This is multiplied times 100 to
; yield a percentage. This percentage similarity is returned to the
; calling procedure.
push bp ;save BP reg.
mov bp,sp ;save SP reg in BP for use in program
push ES ;save the ES seg register
mov ax,DS ;copy DS segment reg to ES
mov ES,ax
xor ax,ax ;zero out AX for clearing of SCORE var
mov score,ax ;zero out SCORE
mov stcknum,ax ;initialize number of stack entries to 0
mov si,[bp+4] ;move beginning pointer of string 1 to SI
mov di,[bp+6] ;move beginning pointer of string 2 to SI
cmp [si],al ;is it a null string?
je StrErr ;can't process null strings
cmp [di],al ;is it a null string?
jne DoCmp ;Neither is a null string, so process them
StrErr: jmp Donit ;exit routine
DoCmp: push di ;save DI because of SCAS opcode
push si ;save SI because of SCAS opcode
xor al,al ;clear out AL to search for end of string
cld ;set direction flag forward
mov cx,-1 ;make sure we repeat the correct # of times
repnz scasb ;scan for string delimiter in string 2
dec di ;point DI to '$00' byte of string 2
dec di ;point DI to last char of string 2
mov bp,di ;move DI to BP where it is supposed to be
pop di ;restore SI into DI for SCAS (string 1)
repnz scasb ;scan for string delimiter in string 1
not cx ;do one's complement for correct length of st
sub cx,2 ;subtract the two zero bytes at the end of st
mov total,cx ;store string 2's length
dec di ;point DI to '$00' byte of string 1
dec di ;point DI to last char of string 1
mov bx,di ;move DI to BX where it is supposed to be
pop di ;restore DI to what it should be
call PushSt ;push values for the first call to SIMILIARITY
Main: cmp stcknum,0 ;is there anything on the stack?
je Done ;no, then all done!
call PopSt ;get regs set up for a compare call
call Compare ;do compare for this substring set
;TH cmp dx,0 ;if nothing in common then nothing to push
;TH je Main ;try another set
or dx,dx ;if nothing in common then nothing to push TH
jz Main ;try another set TH
shl dx,1 ;*2 for add to score
add score,dx ;add into score
mov bp,stcknum ;get number of entry I want to look at
shl bp,1 ;get AX ready to access string stacks
mov si,[ststr1L+bp] ;move L1 into SI or L1
mov bx,cL1 ;move CL1 into BX or R1
mov di,[ststr2L+bp] ;move L2 into DI or L2
mov cx,cL2 ;move CL2 into CX temporarily
mov ax,[ststr1R+bp] ;get old R1 off of stack
mov cL1,ax ;place in CL1 temporarily
mov ax,[ststr2R+bp] ;get old R2 off of stack
mov cL2,ax ;save in CL2 temporarily
mov bp,cx ;place CL2 into BP
cmp bx,si ;compare CL1 to L1
je Chrght ;if zero, then nothing on left side string 1
cmp bp,di ;compare CL2 to L2
je Chrght ;if zero, then nothing on left side string 2
dec bx ;point to last part of left side string 1
dec bp ;point to last part of left side string 2
cmp bx,si ;only one char to examine?
jne PushIt ;no->we need to examine this
cmp bp,di ;only one char in both?
je Chrght ;nothing to look at if both only contain 1 char
PushIt: call PushSt ;push left side on stack
Chrght: mov si,cR1 ;move CR1 into SI or L1
mov bx,cL1 ;move R1 into BX or R1
mov di,cR2 ;move CR2 into DI or L2
mov bp,cL2 ;move R2 into BP or R2
cmp si,bx ;compare CR1 to R1
je Main ;If zero, then nothing on right side string 1
cmp di,bp ;compare CR2 to R2
je Main ;if zero, then nothing on right side string 2
inc si ;point to last part of right side string 1
inc di ;point to last part of right side string 2
cmp bx,si ;only one char to examine?
jne Push2 ;no-> examine it
cmp bp,di ;only 1 char to examine in both?
je Main ;yes-> get next string off of stack
Push2: call PushSt ;push right side on stack
jmp short Main ;do next level of compares
Done: mov ax,score ;get score into AX for MUL
mov cx,100 ;get 100 into CX for MUL
mul cx ;multiply by 100
mov cx,total ;get total chars for divide
div cx ;divide by total
Donit: pop es ;restore ES to entry value
pop bp ;restore BP back to entry value
ret ;leave with AX holding % similarity
_simil endp
Compare proc near
; The compare routine locates the largest group of characters between string 1
; and string 2. This routine assumes that the direction flag is clear.
; Pass to this routine:
; BX = R1 (right side of string 1)
; DS:SI = L1 (left side of string 1)
; ES